package com.zhen.tika.demo;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: zhen-desktop
 * Date: 2019/1/9
 * Time: 20:24
 */
public class ParserExtraction {

    public static void main(String[] args) throws IOException, TikaException, SAXException {
        File fileDir = new File("chapter2/TikaDemo/files");
        if(!fileDir.exists()){
            System.out.println("文件夹不存在，请检查");
            System.exit(0);
        }
        File[] fileArr = fileDir.listFiles();
        BodyContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        FileInputStream inputStream = null;
        Parser parser = new AutoDetectParser();
        ParseContext context = new ParseContext();
        for(File f : fileArr){
            inputStream = new FileInputStream(f);
            parser.parse(inputStream,handler,metadata,context);
            System.out.println(f.getName() + ":\n" + handler.toString());
        }
    }

}
